{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Spacy\n",
    "\n",
    "spaCy is an open-source software library for advanced Natural Language Processing, written in the programming languages Python and Cython. It is slightly more ready enterprise use cases. Its out of box POS tagger, NER analyzers are very popular. \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentence = \"\"\"European authorities fined Google a record $5.1 billion on Wednesday for \n",
    "abusing its power in the mobile phone market and ordered the company to alter its practices\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('European', 'NORP'),\n",
       " ('Google', 'ORG'),\n",
       " ('$5.1 billion', 'MONEY'),\n",
       " ('Wednesday', 'DATE')]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\"\n",
    "Install spacy\n",
    "$ pip install spacy\n",
    "\n",
    "Download en_core_web_sm module\n",
    "$ python -m spacy download en_core_web_sm\n",
    "\n",
    "\"\"\"\n",
    "\n",
    "import spacy\n",
    "from spacy import displacy\n",
    "from collections import Counter\n",
    "import en_core_web_sm\n",
    "nlp = en_core_web_sm.load()\n",
    "\n",
    "doc = nlp(sentence)\n",
    "[(X.text, X.label_) for X in doc.ents]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Display dependency graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"b5df94673d034d45b0635465062a8b7e-0\" class=\"displacy\" width=\"3530\" height=\"617.0\" direction=\"ltr\" style=\"max-width: none; height: 617.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">European</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">ADJ</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"170\">authorities</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"170\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"290\">fined</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"290\">VERB</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"410\">Google</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"410\">PROPN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"530\">a</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"530\">DET</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"650\">record</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"650\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"770\">$</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"770\">SYM</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"890\">5.1</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"890\">NUM</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1010\">billion</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1010\">NUM</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1130\">on</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1130\">ADP</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1250\">Wednesday</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1250\">PROPN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1370\">for</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1370\">ADP</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1490\">\n",
       "</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1490\">SPACE</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1610\">abusing</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1610\">VERB</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1730\">its</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1730\">DET</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1850\">power</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1850\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1970\">in</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1970\">ADP</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2090\">the</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2090\">DET</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2210\">mobile</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2210\">ADJ</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2330\">phone</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2330\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2450\">market</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2450\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2570\">and</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2570\">CCONJ</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2690\">ordered</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2690\">VERB</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2810\">the</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2810\">DET</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2930\">company</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2930\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3050\">to</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3050\">PART</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3170\">alter</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3170\">VERB</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3290\">its</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3290\">DET</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"527.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3410\">practices</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3410\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-0\" stroke-width=\"2px\" d=\"M70,482.0 C70,422.0 135.0,422.0 135.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M70,484.0 L62,472.0 78,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-1\" stroke-width=\"2px\" d=\"M190,482.0 C190,422.0 255.0,422.0 255.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M190,484.0 L182,472.0 198,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-2\" stroke-width=\"2px\" d=\"M310,482.0 C310,422.0 375.0,422.0 375.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M375.0,484.0 L383.0,472.0 367.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-3\" stroke-width=\"2px\" d=\"M550,482.0 C550,422.0 615.0,422.0 615.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M550,484.0 L542,472.0 558,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-4\" stroke-width=\"2px\" d=\"M310,482.0 C310,302.0 625.0,302.0 625.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">npadvmod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M625.0,484.0 L633.0,472.0 617.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-5\" stroke-width=\"2px\" d=\"M790,482.0 C790,362.0 980.0,362.0 980.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">quantmod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M790,484.0 L782,472.0 798,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-6\" stroke-width=\"2px\" d=\"M910,482.0 C910,422.0 975.0,422.0 975.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M910,484.0 L902,472.0 918,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-7\" stroke-width=\"2px\" d=\"M670,482.0 C670,302.0 985.0,302.0 985.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nummod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M985.0,484.0 L993.0,472.0 977.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-8\" stroke-width=\"2px\" d=\"M310,482.0 C310,182.0 1115.0,182.0 1115.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1115.0,484.0 L1123.0,472.0 1107.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-9\" stroke-width=\"2px\" d=\"M1150,482.0 C1150,422.0 1215.0,422.0 1215.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1215.0,484.0 L1223.0,472.0 1207.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-10\" stroke-width=\"2px\" d=\"M310,482.0 C310,62.0 1365.0,62.0 1365.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1365.0,484.0 L1373.0,472.0 1357.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-11\" stroke-width=\"2px\" d=\"M1390,482.0 C1390,422.0 1455.0,422.0 1455.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\"></textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1455.0,484.0 L1463.0,472.0 1447.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-12\" stroke-width=\"2px\" d=\"M310,482.0 C310,2.0 1610.0,2.0 1610.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advcl</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1610.0,484.0 L1618.0,472.0 1602.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-13\" stroke-width=\"2px\" d=\"M1750,482.0 C1750,422.0 1815.0,422.0 1815.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">poss</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1750,484.0 L1742,472.0 1758,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-14\" stroke-width=\"2px\" d=\"M1630,482.0 C1630,362.0 1820.0,362.0 1820.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1820.0,484.0 L1828.0,472.0 1812.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-15\" stroke-width=\"2px\" d=\"M1870,482.0 C1870,422.0 1935.0,422.0 1935.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M1935.0,484.0 L1943.0,472.0 1927.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-16\" stroke-width=\"2px\" d=\"M2110,482.0 C2110,302.0 2425.0,302.0 2425.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2110,484.0 L2102,472.0 2118,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-17\" stroke-width=\"2px\" d=\"M2230,482.0 C2230,362.0 2420.0,362.0 2420.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-17\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2230,484.0 L2222,472.0 2238,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-18\" stroke-width=\"2px\" d=\"M2350,482.0 C2350,422.0 2415.0,422.0 2415.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-18\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2350,484.0 L2342,472.0 2358,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-19\" stroke-width=\"2px\" d=\"M1990,482.0 C1990,242.0 2430.0,242.0 2430.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-19\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2430.0,484.0 L2438.0,472.0 2422.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-20\" stroke-width=\"2px\" d=\"M1630,482.0 C1630,122.0 2560.0,122.0 2560.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-20\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2560.0,484.0 L2568.0,472.0 2552.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-21\" stroke-width=\"2px\" d=\"M1630,482.0 C1630,62.0 2685.0,62.0 2685.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-21\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2685.0,484.0 L2693.0,472.0 2677.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-22\" stroke-width=\"2px\" d=\"M2830,482.0 C2830,422.0 2895.0,422.0 2895.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-22\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2830,484.0 L2822,472.0 2838,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-23\" stroke-width=\"2px\" d=\"M2710,482.0 C2710,362.0 2900.0,362.0 2900.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-23\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2900.0,484.0 L2908.0,472.0 2892.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-24\" stroke-width=\"2px\" d=\"M3070,482.0 C3070,422.0 3135.0,422.0 3135.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-24\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M3070,484.0 L3062,472.0 3078,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-25\" stroke-width=\"2px\" d=\"M2710,482.0 C2710,242.0 3150.0,242.0 3150.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-25\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">xcomp</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M3150.0,484.0 L3158.0,472.0 3142.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-26\" stroke-width=\"2px\" d=\"M3310,482.0 C3310,422.0 3375.0,422.0 3375.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-26\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">poss</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M3310,484.0 L3302,472.0 3318,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-b5df94673d034d45b0635465062a8b7e-0-27\" stroke-width=\"2px\" d=\"M3190,482.0 C3190,362.0 3380.0,362.0 3380.0,482.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-b5df94673d034d45b0635465062a8b7e-0-27\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M3380.0,484.0 L3388.0,472.0 3372.0,472.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "displacy.render(nlp(str(sentence)), style='dep', jupyter = True, options = {'distance': 120})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('European', 'ADJ', 'european'),\n",
       " ('authorities', 'NOUN', 'authority'),\n",
       " ('fined', 'VERB', 'fine'),\n",
       " ('Google', 'PROPN', 'Google'),\n",
       " ('record', 'NOUN', 'record'),\n",
       " ('$', 'SYM', '$'),\n",
       " ('5.1', 'NUM', '5.1'),\n",
       " ('billion', 'NUM', 'billion'),\n",
       " ('Wednesday', 'PROPN', 'Wednesday'),\n",
       " ('\\n', 'SPACE', '\\n'),\n",
       " ('abusing', 'VERB', 'abuse'),\n",
       " ('power', 'NOUN', 'power'),\n",
       " ('mobile', 'ADJ', 'mobile'),\n",
       " ('phone', 'NOUN', 'phone'),\n",
       " ('market', 'NOUN', 'market'),\n",
       " ('ordered', 'VERB', 'order'),\n",
       " ('company', 'NOUN', 'company'),\n",
       " ('alter', 'VERB', 'alter'),\n",
       " ('practices', 'NOUN', 'practice')]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[(x.orth_,x.pos_, x.lemma_) \n",
    "     for x in [y for y in nlp(sentence) \n",
    "               if not y.is_stop and y.pos_ != 'PUNCT']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's see how to create text classifier using nltk and scikit learn."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
